import pandas as pd
import plotly.express as px33 Decoradores en Pandas
f = '../../data/Temixco_2018_10Min.parquet'
def resumen_estadistico(funcion):
def wrapper(*args,**kwargs):
df = funcion(*args,**kwargs)
print(df.describe())
return df
return wrapper
@resumen_estadistico
def lee_datos(f):
tmx = pd.read_parquet(f)
return tmx
tmx = lee_datos(f)
tmx Ib Ig To RH WS \
count 52423.000000 52423.000000 52560.000000 52560.000000 52560.00000
mean 236.742726 257.414344 22.838098 45.152827 1.90520
std 327.983721 345.976954 4.443339 19.426263 1.04411
min 0.000000 0.000000 8.160000 5.648000 0.05000
25% 0.001000 0.000000 19.350000 29.770000 1.18000
50% 0.260000 3.293000 22.670000 42.600000 1.78500
75% 542.300000 533.900000 26.030000 59.280000 2.46000
max 1021.000000 1348.000000 35.870000 97.700000 14.86000
WD P
count 52560.000000 52560.000000
mean 210.734453 87591.151598
std 109.276328 245.715965
min 0.000000 86772.650000
25% 134.675000 87429.500000
50% 211.900000 87595.090000
75% 319.800000 87760.647500
max 360.000000 88516.950000
| Ib | Ig | To | RH | WS | WD | P | |
|---|---|---|---|---|---|---|---|
| time | |||||||
| 2018-01-01 00:00:00 | NaN | NaN | 18.70 | 36.34 | 1.422 | 316.0 | 87864.11 |
| 2018-01-01 00:10:00 | 0.002 | 0.0 | 18.95 | 35.29 | 1.008 | 283.7 | 87876.37 |
| 2018-01-01 00:20:00 | 0.170 | 0.0 | 18.94 | 35.43 | 1.565 | 326.0 | 87888.64 |
| 2018-01-01 00:30:00 | 0.371 | 0.0 | 18.77 | 35.89 | 2.175 | 354.5 | 87887.21 |
| 2018-01-01 00:40:00 | 0.305 | 0.0 | 18.81 | 36.34 | 1.902 | 348.0 | 87886.91 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 2018-12-31 23:10:00 | 0.125 | 0.0 | 18.51 | 47.29 | 1.715 | 332.2 | 87484.32 |
| 2018-12-31 23:20:00 | 0.000 | 0.0 | 18.26 | 48.02 | 1.703 | 320.5 | 87470.70 |
| 2018-12-31 23:30:00 | 0.044 | 0.0 | 18.39 | 46.84 | 2.887 | 335.7 | 87455.03 |
| 2018-12-31 23:40:00 | 0.170 | 0.0 | 17.99 | 47.85 | 1.528 | 358.8 | 87470.02 |
| 2018-12-31 23:50:00 | 0.003 | 0.0 | 17.75 | 49.65 | 0.598 | 322.3 | 87467.29 |
52560 rows × 7 columns
def agregar_diff_To(funcion):
def wrapper(*args, **kwargs):
df = funcion(*args, **kwargs)
df['To_diff'] = df['To'].diff()
return df
return wrapper
def resumen_estadistico(funcion):
def wrapper(*args, **kwargs):
df = funcion(*args, **kwargs)
print(df.describe())
return df
return wrapper
#La razón por la que resumen_estadistico debe ir primero y luego agregar_diff_to
# se debe al orden de ejecución de los decoradores. Los decoradores se aplican de arriba
# hacia abajo, pero se ejecutan de abajo hacia arriba.
@resumen_estadistico
@agregar_diff_To
def lee_datos(f):
tmx = pd.read_parquet(f)
return tmx
f = '../../data/Temixco_2018_10Min.parquet'
tmx = lee_datos(f)
tmx Ib Ig To RH WS \
count 52423.000000 52423.000000 52560.000000 52560.000000 52560.00000
mean 236.742726 257.414344 22.838098 45.152827 1.90520
std 327.983721 345.976954 4.443339 19.426263 1.04411
min 0.000000 0.000000 8.160000 5.648000 0.05000
25% 0.001000 0.000000 19.350000 29.770000 1.18000
50% 0.260000 3.293000 22.670000 42.600000 1.78500
75% 542.300000 533.900000 26.030000 59.280000 2.46000
max 1021.000000 1348.000000 35.870000 97.700000 14.86000
WD P To_diff
count 52560.000000 52560.000000 52559.000000
mean 210.734453 87591.151598 -0.000018
std 109.276328 245.715965 0.438517
min 0.000000 86772.650000 -8.220000
25% 134.675000 87429.500000 -0.230000
50% 211.900000 87595.090000 -0.020000
75% 319.800000 87760.647500 0.220000
max 360.000000 88516.950000 4.280000
| Ib | Ig | To | RH | WS | WD | P | To_diff | |
|---|---|---|---|---|---|---|---|---|
| time | ||||||||
| 2018-01-01 00:00:00 | NaN | NaN | 18.70 | 36.34 | 1.422 | 316.0 | 87864.11 | NaN |
| 2018-01-01 00:10:00 | 0.002 | 0.0 | 18.95 | 35.29 | 1.008 | 283.7 | 87876.37 | 0.25 |
| 2018-01-01 00:20:00 | 0.170 | 0.0 | 18.94 | 35.43 | 1.565 | 326.0 | 87888.64 | -0.01 |
| 2018-01-01 00:30:00 | 0.371 | 0.0 | 18.77 | 35.89 | 2.175 | 354.5 | 87887.21 | -0.17 |
| 2018-01-01 00:40:00 | 0.305 | 0.0 | 18.81 | 36.34 | 1.902 | 348.0 | 87886.91 | 0.04 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2018-12-31 23:10:00 | 0.125 | 0.0 | 18.51 | 47.29 | 1.715 | 332.2 | 87484.32 | -0.10 |
| 2018-12-31 23:20:00 | 0.000 | 0.0 | 18.26 | 48.02 | 1.703 | 320.5 | 87470.70 | -0.25 |
| 2018-12-31 23:30:00 | 0.044 | 0.0 | 18.39 | 46.84 | 2.887 | 335.7 | 87455.03 | 0.13 |
| 2018-12-31 23:40:00 | 0.170 | 0.0 | 17.99 | 47.85 | 1.528 | 358.8 | 87470.02 | -0.40 |
| 2018-12-31 23:50:00 | 0.003 | 0.0 | 17.75 | 49.65 | 0.598 | 322.3 | 87467.29 | -0.24 |
52560 rows × 8 columns
tmx| Ib | Ig | To | RH | WS | WD | P | To_diff | |
|---|---|---|---|---|---|---|---|---|
| time | ||||||||
| 2018-01-01 00:00:00 | NaN | NaN | 18.70 | 36.34 | 1.422 | 316.0 | 87864.11 | NaN |
| 2018-01-01 00:10:00 | 0.002 | 0.0 | 18.95 | 35.29 | 1.008 | 283.7 | 87876.37 | 0.25 |
| 2018-01-01 00:20:00 | 0.170 | 0.0 | 18.94 | 35.43 | 1.565 | 326.0 | 87888.64 | -0.01 |
| 2018-01-01 00:30:00 | 0.371 | 0.0 | 18.77 | 35.89 | 2.175 | 354.5 | 87887.21 | -0.17 |
| 2018-01-01 00:40:00 | 0.305 | 0.0 | 18.81 | 36.34 | 1.902 | 348.0 | 87886.91 | 0.04 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2018-12-31 23:10:00 | 0.125 | 0.0 | 18.51 | 47.29 | 1.715 | 332.2 | 87484.32 | -0.10 |
| 2018-12-31 23:20:00 | 0.000 | 0.0 | 18.26 | 48.02 | 1.703 | 320.5 | 87470.70 | -0.25 |
| 2018-12-31 23:30:00 | 0.044 | 0.0 | 18.39 | 46.84 | 2.887 | 335.7 | 87455.03 | 0.13 |
| 2018-12-31 23:40:00 | 0.170 | 0.0 | 17.99 | 47.85 | 1.528 | 358.8 | 87470.02 | -0.40 |
| 2018-12-31 23:50:00 | 0.003 | 0.0 | 17.75 | 49.65 | 0.598 | 322.3 | 87467.29 | -0.24 |
52560 rows × 8 columns
def grafica_Ig_Ib(tmx):
fig = px.line(tmx, x = tmx.index, y = ['Ib','Ig'])
return fig
grafica_Ig_Ib(tmx)33.1 Actividades sugeridas:
- Haz un decorador que valide si existen las columnas Ig e Ib y si no existe alguna de las dos, imprima las columnas existentes en el df.
Si ya lo hiciste pero quieres ver una solución, expande la celda siguiente:
def validar_columnas_ig_ib(func):
def wrapper(df, *args, **kwargs):
required_columns = ['Ig', 'Ib']
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
print(f"Columnas faltantes: {missing_columns}")
print("Columnas existentes en el DataFrame:", df.columns.tolist())
return
return func(df, *args, **kwargs)
return wrapper
@validar_columnas_ig_ib
def grafica_Ig_Ib(tmx):
fig = px.line(tmx, x=tmx.index, y=['Ib', 'Ig'])
return fig
grafica_Ig_Ib(tmx)34 Actividades sugeridas:
- Haz el clásico decorador que mide el tiempo del proceso.
- Haz un decorador que diga que función estás ejecutando y que argumentos estas recibiendo.
- Revisa el paquete https://github.com/groodt/retrying para ver como usar decoradores ya definidos